"""
ll /usr/share/dict/ameri*
-rw-r--r-- 1 root root 950K 2018-04-25 07:08:24 /usr/share/dict/american-english
-rw-r--r-- 1 root root 3.4M 2018-04-25 07:08:24 /usr/share/dict/american-english-huge
-rw-r--r-- 1 root root 6.6M 2018-04-25 07:08:24 /usr/share/dict/american-english-insane
-rw-r--r-- 1 root root 1.6M 2018-04-25 07:08:24 /usr/share/dict/american-english-large
-rw-r--r-- 1 root root 458K 2018-04-25 07:08:24 /usr/share/dict/american-english-small
"""

import re

RE_WORD = re.compile('^[a-zA-Z0-9 \']+$')

sql_file = 'create_database.english_word.insert.sql'
files = 'american-english-small', 'american-english', 'american-english-large', \
        'american-english-huge', 'american-english-insane'
with open(sql_file, 'w') as _file:
    pass
for f in files:
    filepath = '/usr/share/dict/' + f
    with open(filepath) as _file:
        words = []
        sql = 'INSERT IGNORE INTO catroll.english_word (word, source) VALUES '
        for word in _file:
            word = word.strip()
            if not RE_WORD.match(word):
                print(word)
                continue
            words.append(word.lower())
            if len(words) == 100:
                with open(sql_file, 'a+') as _file:
                    _file.write(sql + ', '.join('("%s", "%s")' % (w, f) for w in words) + ';\n')
                words = []
        if words:
            with open(sql_file, 'a+') as _file:
                _file.write(sql + ', '.join('("%s", "%s")' % (w, f) for w in words) + ';\n')
            pass
